/*******************************************************************************
																				
	DESCRIPTION:  	This do file produces statistics about individuals with at
					least two spells in our sample.
	
*******************************************************************************/

clear all
global id_code 134
set seed 2110

* Set the number of bootstrap replications for SEs:
global n_bootstrap = 500

/*******************************************************************************
*	Create program for main stats
********************************************************************************/					   
									   
* Compute statistics:
cap program drop main_stats
program main_stats, eclass

	syntax [if]
	

	assert !missing(emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
			p_emplAft6M_0M_In_6M_Mod2, ///
			emplAft6M_0M_In1, p_emplAft6M_0M_In1)

	* Compute statistics for panel B (persistence across spells):
	* Actual JFR1 vs actual JFR2:
	qui corr emplAft6M_0M_In1 emplAft6M_0M_In2 `if', cov
	local v_pers = r(cov_12)
	local n = r(N)

	* Predicted JFR1 vs actual JFR2:
	qui corr p_emplAft6M_0M_In1 emplAft6M_0M_In2 `if', cov
	local v_obs_pers = r(cov_12)
	
	qui corr p_emplAft6M_0M_In1 p_emplAft6M_0M_In2 `if', cov
	local v2_obs_pers = r(cov_12)

	* Predicted JFR2 vs actual JFR2 (two-spell sample)
	qui corr p_emplAft6M_0M_In2 emplAft6M_0M_In2 `if', cov
	local v_obs = r(cov_12)
	local v2_obs = r(Var1)

	* Predicted JFR2 vs actual JFR2 (control sample):
	qui corr p_emplAft6M_0M_InC2 emplAft6M_0M_InC2 `if', cov
	local v_obs_control = r(cov_12)
	local v2_obs_control = r(Var1)

	* Implied other components:
	local v_unobs_pers = `v_pers' - `v_obs_pers'
	local v_obs_trans = `v_obs' - `v_obs_pers'
	local v = (`v_pers'/`v_obs_pers') * `v_obs'
	local v_control = (`v_pers'/`v_obs_pers') * `v_obs_control'	
	
	
	tempname b
	
	matrix `b' = [`v_pers', `v_obs_pers', `v_obs', `v_obs_control', `v_unobs_pers', `v_obs_trans', `v', `v_control', `n']
	matrix colnames `b' = v_pers v_obs_pers v_obs v_obs_control v_unobs_pers v_obs_trans v v_control n
	
	
	ereturn post `b', obs(`n')
	ereturn display

end

/*******************************************************************************
*	Create program for LTU stats
********************************************************************************/					   
								   
* Compute statistics:
cap program drop ltu_stats
program ltu_stats, eclass

	syntax [if/]
	
	if "`if'" != "" {
		local andif "& `if'"
		local if "if `if'"
	}
	

	assert !missing(emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
					 p_emplAft6M_0M_In_6M_Mod2, ///
					 emplAft6M_0M_In1, p_emplAft6M_0M_In1)

	* Actual JFR 6M into the first spell vs actual JFR 0M into the second spell:
	qui corr emplAft6M_6M_In1 emplAft6M_0M_In2 if emplAft6M_0M_In1 == 0 ///
												& !missing(emplAft6M_6M_In1, p_emplAft6M_6M_In1, ///
														   emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
														   p_emplAft6M_0M_In_6M_Mod2, ///
														   emplAft6M_0M_In1, p_emplAft6M_0M_In1) ///
												`andif', cov
	local v_pers = r(cov_12)
	local n = r(N)	

	* Predicted JFR 6M into the first spell vs actual JFR 0M into the second:
	qui corr p_emplAft6M_6M_In1 emplAft6M_0M_In2 if emplAft6M_0M_In1 == 0 ///
												& !missing(emplAft6M_6M_In1, p_emplAft6M_6M_In1, ///
														   emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
														   p_emplAft6M_0M_In_6M_Mod2, ///
														   emplAft6M_0M_In1, p_emplAft6M_0M_In1) ///
												`andif', cov
	local v_obs_pers = r(cov_12)

	* Predicted JFR2 at 6M vs actual JFR2 at 0M:
	qui corr p_emplAft6M_0M_In_6M_Mod2 emplAft6M_0M_In2 if emplAft6M_0M_In1 == 0 ///
												& !missing(emplAft6M_6M_In1, p_emplAft6M_6M_In1, ///
														   emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
														   p_emplAft6M_0M_In_6M_Mod2, ///
														   emplAft6M_0M_In1, p_emplAft6M_0M_In1) ///
												`andif', cov
	local v_obs = r(cov_12)
	
	* Predicted JFR2 at 6M vs actual JFR2 at 0M (Two-spell sample):
	qui corr p_emplAft6M_0M_In_6M_Mod2 emplAft6M_0M_In2 `if', cov
	local v_obs_twospell = r(cov_12)

	* Predicted JFR2 at 6M vs actual JFR2 at 0M (control sample):
	qui corr p_emplAft6M_0M_In_6M_ModC2 emplAft6M_0M_InC2 `if', cov
	local v_obs_control = r(cov_12)

	* Implied other components:
	local v_unobs_pers = `v_pers' - `v_obs_pers'
	local v_obs_trans = `v_obs' - `v_obs_pers'
	local v = (`v_pers'/`v_obs_pers') * `v_obs'
	local v_twospell = (`v_pers'/`v_obs_pers') * `v_obs_twospell'	
	local v_control = (`v_pers'/`v_obs_pers') * `v_obs_control'	
	
	tempname b
	
	matrix `b' = [`v_pers', `v_obs_pers', `v_obs', `v_obs_twospell', `v_obs_control', `v_unobs_pers', `v_obs_trans', `v' , `v_twospell', `v_control', `n']
	matrix colnames `b' = v_pers v_obs_pers v_obs v_obs_twospell v_obs_control v_unobs_pers v_obs_trans v v_twospell v_control n
	
	
	ereturn post `b', obs(`n')
	ereturn display

end

/*******************************************************************************
*	WRAPPER PROGRAM
********************************************************************************/
						   
* Main stats::
cap program drop twospell_analysis
program twospell_analysis, eclass

	syntax, [yeardiff(numlist >=0 integer)] [ltu] [iflist(string)] [ifnames(string)]
		
	preserve
	
		* Randomly sort the data and pick the first two spells for all individuals:
		sort LopNr_PersonNr InLnr
		gen double rand = runiform()

		sort LopNr_PersonNr rand

		by LopNr_PersonNr: gen n_spell = _n
		qui keep if n_spell <= 2

		drop rand

		* Transform to wide format:
		qui reshape wide InLnr n year startU trueEnd duration emplAft6M_0M_In ///
			p_emplAft6M_0M_In emplAft6M_6M_In p_emplAft6M_6M_In ///
			p_emplAft6M_0M_In_6M_Mod p_emplAft6M_0M_InC emplAft6M_0M_InC ///
			p_emplAft6M_0M_In_6M_ModC, ///
			i(LopNr_PersonNr) j(n_spell)
			
		* Exclusions: remove individuals who have the two spells in the same year, as well
		* as individuals who have overlapping spells:
		qui gen yeardiff = year2 - year1
		qui gen exactdiff = startU2 - startU1
		qui gen exactdiff_end_start = startU2 - trueEnd1

		qui drop if (startU2 > startU1) & (startU2 <= trueEnd1) | (startU1 > startU2) & (startU1 <= trueEnd2)
		qui drop if year2 == year1
		
		tempname b b1
		local first_yeardiff: word 1 of `yeardiff'
		
		if `"`iflist'"' == `""' {
			foreach i of numlist `yeardiff' {
								
				qui main_stats if abs(yeardiff) > `i'
				matrix `b1' = e(b)
				matrix coleq `b1' = main_`i'y
				if `i' == `first_yeardiff' {
					matrix `b' = `b1'
				}
				else {
					matrix `b' = [`b', `b1']
				}
					
				if "`ltu'" == "ltu" {	
					qui ltu_stats if abs(yeardiff) > `i'
					matrix `b1' = e(b)
					matrix coleq `b1' = ltu_`i'y
					matrix `b' = [`b', `b1']
				}
			}
		}
		else {
			local i = 1
			local iflist_iter `"`iflist'"'
			local ifnames_iter `"`ifnames'"'
			while `"`iflist_iter'"' != `""' & `"`iflist_iter'"' != `" "' {
			
				gettoken cond iflist_iter : iflist_iter
				gettoken eqname ifnames_iter : ifnames_iter
												
				qui main_stats if abs(yeardiff) > 0 & `cond'
				matrix `b1' = e(b)
				matrix coleq `b1' = `eqname'
				if `i' == 1 {
					matrix `b' = `b1'
				}
				else {
					matrix `b' = [`b', `b1']
				}
						
				local i = `i' + 1
			}
		}
		
		ereturn post `b'
		ereturn display
		
	restore

end


frame change default
use "${data}/${id_code}_MultipleSpellSample.dta", clear	 							 

* Compute all stats in one go:
set seed 2110
bootstrap, cluster(LopNr_PersonNr) reps(${n_bootstrap}):  twospell_analysis,  yeardiff(0 2 5) ltu
estimates save "${output}/${id_code}_TwoSpell_Analysis.ster", replace


/*******************************************************************************
*	SPLITTING BY BETA_D IN 2006
********************************************************************************/

use "${data}/119_3_DurationDependenceBetas_Full_2006.dta", clear

keep LopNr_PersonNr InLnr year beta_1_shrunk_ind

* Check that distinct spells by the same individuals have the same betaD:
bysort LopNr_PersonNr: egen temp = mean(beta_1_shrunk_ind)
assert temp == beta_1_shrunk_ind

* Drop duplicates:
drop InLnr temp
duplicates drop

* Rank by betaD:
cumul beta_1_shrunk_ind, gen(betaD_rank) equal
sort betaD_rank
gen betaD_above_med = betaD_rank > 0.5

* Save to frame:
frame copy default betaD, replace

* Import multiple spell data:
use "${data}/${id_code}_MultipleSpellSample.dta", clear

* Merge with betaD:
frlink m:1 LopNr_PersonNr, frame(betaD)
frget betaD_above_med beta_1_shrunk_ind, from(betaD)

* Compute statistics for all with a spell in 2006:
set seed 2110
bootstrap, cluster(LopNr_PersonNr) reps(${n_bootstrap}): twospell_analysis, ///
	iflist(`""!missing(betaD_above_med)" "betaD_above_med == 0" "betaD_above_med == 1""') ifnames("NonMissing BelowMedian AboveMedian")
estimates save "${output}/${id_code}_TwoSpell_Analysis_SplitByBetaD.ster", replace


/*******************************************************************************
*	Other useful statistics (only on the baseline samples)
********************************************************************************/

use "${data}/${id_code}_TwoSpellSample.dta", clear

* Ensure all variables are non-missing in multiple spell sample:
assert !missing(emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
				p_emplAft6M_0M_In_6M_Mod2, ///
				emplAft6M_0M_In1, p_emplAft6M_0M_In1)

* Create frame:
cap frame drop tabl
frame create tabl str30(sample variable) double(N E Var)

* Compute stats in control sample:
corr p_emplAft6M_0M_InC2 emplAft6M_0M_InC2 p_emplAft6M_0M_In_6M_ModC2, cov
local v_obs_control = r(cov_12)
matrix C_control = r(C)

foreach v in p_emplAft6M_0M_InC2 emplAft6M_0M_InC2 p_emplAft6M_0M_In_6M_ModC2 {
	sum `v'
	frame post tabl ("Reference Sample") ("`v'")  (r(N)) (r(mean)) (r(Var))
}
	
* Compute stats in Multiple Spell sample:
corr emplAft6M_0M_In1 p_emplAft6M_0M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2, cov
matrix C_twospell = r(C)

foreach v in emplAft6M_0M_In1 p_emplAft6M_0M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2 {
	sum `v'
	frame post tabl ("Multiple Spells Sample") ("`v'")  (r(N)) (r(mean)) (r(Var))
}

* Compute stats in Multiple Spell x LTU sample:
corr emplAft6M_6M_In1 p_emplAft6M_6M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2 ///
	if emplAft6M_0M_In1 == 0 ///
		& !missing(emplAft6M_6M_In1, p_emplAft6M_6M_In1, ///
				   emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
				   p_emplAft6M_0M_In_6M_Mod2, ///
				   emplAft6M_0M_In1, p_emplAft6M_0M_In1), cov
matrix C_twospell_ltu = r(C)

foreach v in emplAft6M_6M_In1 p_emplAft6M_6M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2 {
	sum `v' if emplAft6M_0M_In1 == 0 ///
		& !missing(emplAft6M_6M_In1, p_emplAft6M_6M_In1, ///
				   emplAft6M_0M_In2, p_emplAft6M_0M_In2, ///
				   p_emplAft6M_0M_In_6M_Mod2, ///
				   emplAft6M_0M_In1, p_emplAft6M_0M_In1)
	frame post tabl ("Multiple Spells, LTU Sample") ("`v'")  (r(N)) (r(mean)) (r(Var))
}

* Export summary statistics:
frame change tabl
	replace variable = "hat_F_0_t2" if inlist(variable, "p_emplAft6M_0M_InC2", "p_emplAft6M_0M_In2")
	replace variable = "F_0_t2" if inlist(variable, "emplAft6M_0M_InC2", "emplAft6M_0M_In2")
	replace variable = "hat_F_6_t2" if inlist(variable, "p_emplAft6M_0M_In_6M_ModC2", "p_emplAft6M_0M_In_6M_Mod2")
	replace variable = "hat_F_0_t1" if inlist(variable, "p_emplAft6M_0M_In1")
	replace variable = "F_0_t1" if inlist(variable, "emplAft6M_0M_In1")
	replace variable = "hat_F_6_t1" if inlist(variable, "p_emplAft6M_6M_In1")
	replace variable = "F_6_t1" if inlist(variable, "emplAft6M_6M_In1")

	export delimited using "${output}/${id_code}_MultipleSpells_SummaryStats.csv", replace

frame change default
* Export variance-covariance matrices:
frame tabl {
	clear 
	svmat C_control, names(col)
	rename (emplAft6M_0M_InC2 p_emplAft6M_0M_InC2 p_emplAft6M_0M_In_6M_ModC2) (F_0_t2 hat_F_0_t2 hat_F_6_t2)
	export delimited using "${output}/${id_code}_ReferenceSample_VCov.csv", replace

	clear
	svmat C_twospell, names(col)
	rename (emplAft6M_0M_In1 p_emplAft6M_0M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2) (F_0_t1 hat_F_0_t1 F_0_t2 hat_F_0_t2 hat_F_6_t2)
	export delimited using "${output}/${id_code}_MultipleSpellsSample_VCov.csv", replace
	
	clear
	svmat C_twospell_ltu, names(col)
	rename (emplAft6M_6M_In1 p_emplAft6M_6M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2) (F_6_t1 hat_F_6_t1 F_0_t2 hat_F_0_t2 hat_F_6_t2)
	export delimited using "${output}/${id_code}_MultipleSpellsxLTUSample_VCov.csv", replace
	
}

* Finally, merge with betaD again:
frlink 1:1 LopNr_PersonNr, frame(betaD)
frget betaD_above_med beta_1_shrunk_ind, from(betaD)

* Compute statistics of the job-finding rate in all three samples:
cap frame drop tabl
frame create tabl str50(sample variable) double(N E Var Med)
foreach v in emplAft6M_0M_In1 p_emplAft6M_0M_In1 emplAft6M_0M_In2 p_emplAft6M_0M_In2 p_emplAft6M_0M_In_6M_Mod2 beta_1_shrunk_ind {
	sum `v' if !missing(betaD_above_med), detail
	frame post tabl ("Individuals in the 2006 holdout sample") ("`v'")  (r(N)) (r(mean)) (r(Var)) (r(p50))
	
	sum `v' if betaD_above_med == 0, detail
	frame post tabl ("Below Median \beta_D(X)") ("`v'")  (r(N)) (r(mean)) (r(Var)) (r(p50))
	
	sum `v' if betaD_above_med == 1, detail
	frame post tabl ("Above Median \beta_D(X)") ("`v'")  (r(N)) (r(mean)) (r(Var)) (r(p50))
	
}

frame change tabl
	replace variable = "hat_F_0_t2" if inlist(variable, "p_emplAft6M_0M_InC2", "p_emplAft6M_0M_In2")
	replace variable = "F_0_t2" if inlist(variable, "emplAft6M_0M_InC2", "emplAft6M_0M_In2")
	replace variable = "hat_F_6_t2" if inlist(variable, "p_emplAft6M_0M_In_6M_ModC2", "p_emplAft6M_0M_In_6M_Mod2")
	replace variable = "hat_F_0_t1" if inlist(variable, "p_emplAft6M_0M_In1")
	replace variable = "F_0_t1" if inlist(variable, "emplAft6M_0M_In1")
	replace variable = "hat_F_6_t1" if inlist(variable, "p_emplAft6M_6M_In1")
	replace variable = "F_6_t1" if inlist(variable, "emplAft6M_6M_In1")
	replace variable = "beta_D(X)" if inlist(variable, "beta_1_shrunk_ind")

	export delimited using "${output}/${id_code}_MultipleSpells_SummaryStats_SplitByBetaD.csv", replace

frame change default

* Go to betaD frame and compute distributions:
frame change betaD
	frlink 1:1 LopNr_PersonNr, frame(default)
	gen matched = !missing(default)
	
	qui sum beta_1_shrunk_ind, detail
	local med_all: di %5.3f r(p50)
	
	qui sum beta_1_shrunk_ind if matched == 1, detail
	local med_2006: di %5.3f r(p50)
	
	qui sum beta_1_shrunk_ind if matched == 1 & betaD_above_med == 0, detail
	local med_2006_below: di %5.3f r(p50)
	
	qui sum beta_1_shrunk_ind if matched == 1 & betaD_above_med == 1, detail
	local med_2006_above: di %5.3f r(p50)

	* Density:
	twoway ///
		(kdensity beta_1_shrunk_ind, recast(area)  fcolor(ebblue%30) lcolor(ebblue)) ///
		(kdensity beta_1_shrunk_ind if matched == 1, fcolor(orange_red%30) lcolor(orange_red) recast(area)) ///
		, ///
		xline(`med_all', lpattern(dash) lcolor(black)) ///
		graphregion(color(white)) plotregion(margin(b=0 l=0))						///
		xtitle("{&beta}{sub:D}", size(9pt)) ytitle("Density") 			///
		xscale(titlegap(2)) yscale(titlegap(2))	xlabel(-0.075(0.025)0.075, axis(1) format(%5.3f)) ///
		ylabel(, angle(0) format(%9.0g)) ///
		legend(cols(2) order(1 "2006 Holdout Sample" 2 "2006 x Multiple Spell Sample") region(lwidth(none))) ///
		note("{bf:Median {&beta}{sub:D}}" "2006: `med_all'" "2006 x Mult. Spell: `med_2006'" " - Below 2006 med.: `med_2006_below'" " - Above 2006 med.: `med_2006_above'", ring(0) pos(2)) ///
		name(dist_betaD, replace)
	
	
	graph export "${output}/${id_code}_betaD_kdensity.pdf", as(pdf) replace

	
frame change default
	